*This is a STATA Do-file to replicate Table 4 in Feenstra-Li-Yu (2014, ReSTATA)                                                                         *
*The code is by Robert Feenstra (UC-Davis), Zhiyuan Li(SHUFE), and Miaojie Yu(Peking Univ.)                                                             *
*To run the code, please put all datasets in the same directoray in your PC                                                                             *


********** STATA SETUP
clear
drop _all
set memory 31g
set matsize 10100
set more off

capture log close   
log using Table4.out, text replace



********READ DATA******
u Table4

************
* There are in fact 4 steps to obtain the estimates in column 1 of Table 4 and a single estimate of columns (2)-(3) of Table 4 
*(1) The pliminary regression of firm TFP (called tfpop) on firm-level indicators, 4-digit industry indicators and ex-ante TFP (called tfpop2), and interactions between 2-digit industry indicators and other variables that appead on the right of Equ. (30) in the text
*(2) The selectin equation (30) in the text using fitted TFP
*(3) The second-step Heckman Equation excluding fitted TFP, used to obtain predicted export share
*(4) The  first-step of the 2SLS estimates, see footnote 26 in the text for details
*(5) The second-step of the 2SLS estimates, see footnote 26 in the text for details
* Panel bootstrapping by randomly drawing firms will be done over all five steps, which thereby correcs for clustering by firms, as shown in other files



***Step (1): Prelim Fit**********


xi i.cic2d
local i 14
while `i'<=37 { 
g klratio_Icic2d_`i'=klratio*_Icic2d_`i'
local i=`i'+1
}

local i 39
while `i'<=42 { 
g klratio_Icic2d_`i'=klratio*_Icic2d_`i'
local i=`i'+1
}




xi i.cic_adj
qui xtreg tfpop tfpop2  lntang tang_percent  tang_dummy klratio*  dyear* _Icic_adj*, fe i(newid)
predict tfpp if e(sample), xbu







*****Type-II Tobit Estimates****
**Step (2): The Selection Equation with fitted TFP (tfpp)***


xi: qui probit FX tfpp    tang_percent  tang_dummy klratio* dyear2-dyear9 i.cic_adj
predict XI

predict PROBITXB, xb
gen PDFPROBIT=(1/sqrt(2*_pi))*exp(-(PROBITXB^2/2))
gen CDFPROBIT = normprob(PROBITXB)
gen IMR_klratio = PDFPROBIT/CDFPROBIT      /* gets the inverse mills ratio*/
su IMR_klratio

***Step (3): the second-step Heckman Equation excluding fitted TFP, used to obtain predicted export share******************
xi: qui reg expint  tang_percent  tang_dummy klratio* IMR_klratio dyear2-dyear9 i.cic_adj
predict expint_p


su expint if sea==1, d
su expint if nosea==1 , d

su expint if sea==1   & FX==1, d
su expint if nosea==1 & FX==1, d



g expintp_int    =expint_p*int_usd
g expintp_tang   =expint_p*tang_percent
g expintp_int_sea  =expintp_int*sea
g expintp_int_nosea=expintp_int*nosea
g expintp_sea      =expint_p*sea
g expintp_nosea      =expint_p*nosea


*****Generate variables for main estimates*********************
 g       iv            =exp(tfpop2)
 g       iv_expint     =iv*expint
 g       iv_expintsq   =iv*expintsq 
 
 g       iv_expintp    =iv*expint_p
 g       iv_expintpsq  =iv*(expint_p)^2  
 
 g iv_expintp_sea    =iv_expintp*sea
 g iv_expintp_nosea  =iv_expintp*nosea


g diff=expint-expint_p
su diff if FX==1, d
g diffvar=r(Var)  
su diffvar

su expint if FX==1,d   
g etamean=r(mean)
su etamean

g expintpsq=(expint_p)^2
g expintsqp=expintpsq/XI*(1+diffvar/(etamean)^2)
su expintsqp expintpsq

g expintsqp_int=expintsqp*int_usd

g expintsqp_int_sea   =expintsqp_int*sea
g expintsqp_int_nosea =expintsqp_int*nosea

g iv_expintsqp =iv*(expintsqp)  
g iv_expintsqp_sea=iv_expintsqp*sea
g iv_expintsqp_nosea=iv_expintsqp*nosea
 
g expintsqp_tang =expintsqp*tang_percent

****Step (4)-(5): 2SLS estimates of Equ. (25), shown as Column (1) of Table 4********
su expint if FX==1, d


xi: ivreg2 rev_usd  (int_usd  expintp_int expintsqp_int =iv  iv_expintp iv_expintsqp) expint_p   FX  tang_percent expintp_tang expintsqp_tang tang_dummy i.cic1d    dyear* , robust 


****************A single estimation equation that covers Columns (2)-(3) of Table 4***

su expint if sea==1   & FX==1, d
su expint if nosea==1 & FX==1, d



xi i.cic1d
g expintp_Icic1d_2=expint_p*_Icic1d_2
g expintp_Icic1d_3=expint_p*_Icic1d_3
g expintp_Icic1d_4=expint_p*_Icic1d_4

g expintp_dy1=expint_p*dyear1
g expintp_dy2=expint_p*dyear2
g expintp_dy3=expint_p*dyear3
g expintp_dy4=expint_p*dyear4
g expintp_dy5=expint_p*dyear5
g expintp_dy6=expint_p*dyear6


xi: ivreg2 rev_usd  (int_usd  expintp_int_sea expintp_int_nosea expintsqp_int_sea expintsqp_int_nosea =iv  iv_expintp_sea iv_expintp_nosea iv_expintsqp_sea iv_expintsqp_nosea) expintp_sea expintp_nosea expintp_Icic1d_2 expintp_Icic1d_3 expintp_Icic1d_4 expintp_dy*   FX  tang_percent expintp_tang expintsqp_tang tang_dummy i.cic1d dyear*  , robust 



********** CLOSE OUTPUT
drop _all
log close